import os
import sys
stderr = sys.stderr
sys.stderr = open(os.devnull, 'w')
import keras
sys.stderr = stderr
import math
import pandas_datareader as web
import numpy as np
import pandas as pd
import datetime as dt
from datetime import datetime
import math
from sklearn.metrics import mean_squared_error
from subprocess import check_output
from keras.models import Sequential
from keras.layers import Dense,LSTM, Dropout, GRU, Bidirectional
from keras.optimizers import Adam, SGD
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
ticker = input('Enter the ticker:')
start_date = input('Enter start date in YY-MM-DD:')
end_date = input('Enter date in YY-MM-DD:')
df = web.DataReader(ticker, data_source='yahoo',start= start_date,end= end_date)
df
df_plot = df.reset_index(inplace = False)
df_plot
cols = list(df_plot)[1:5]
cols
datelist_train = list(df_plot['Date'])
print('All timestamps == {}'.format(len(datelist_train)))
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots as ms
figure1 = ms(rows = 2, cols = 2, subplot_titles = ("Open", "Close", "Low", "High"))
figure1.add_trace(go.Line(x = df_plot['Date'], y = df['Open']), row = 1, col = 1)
figure1.add_trace(go.Line(x = df_plot['Date'], y = df['Close']), row = 1, col = 2)
figure1.add_trace(go.Line(x = df_plot['Date'], y = df['Low']), row = 2, col = 1)
figure1.add_trace(go.Line(x = df_plot['Date'], y = df['High']), row = 2, col = 2)
figure1.update_layout(height = 1000, width = 1500, title_text = 'Price Summary',
showlegend = False, template = 'presentation')
figure1.update_xaxes(rangeslider_visible = True, rangeselector = dict(buttons=list([
dict(count=1, label="1m", step="month", stepmode="backward"),
dict(count=6, label="6m", step="month", stepmode="backward"),
dict(count=1, label="YTD", step="year", stepmode="todate"),
dict(count=1, label="1y", step="year", stepmode="backward"),
dict(step="all")])))
figure1.show()
fig2 = px.line(df_plot, x = 'Date', y = 'Volume',
title='Volume of Stocks',
template = 'presentation')
fig2.update_xaxes(rangeslider_visible = True, rangeselector = dict(buttons=list([
dict(count=1, label="1m", step="month", stepmode="backward"),
dict(count=6, label="6m", step="month", stepmode="backward"),
dict(count=1, label="YTD", step="year", stepmode="todate"),
dict(count=1, label="1y", step="year", stepmode="backward"),
dict(step="all")])))
fig2.show()
fig3 = px.line(df_plot, x = 'Date', y = ['Open', 'Close'],
title = 'Open vs Close',
template = 'presentation')
fig3.update_xaxes(rangeslider_visible = True, rangeselector = dict(buttons=list([
dict(count=1, label="1m", step="month", stepmode="backward"),
dict(count=6, label="6m", step="month", stepmode="backward"),
dict(count=1, label="YTD", step="year", stepmode="todate"),
dict(count=1, label="1y", step="year", stepmode="backward"),
dict(step="all")])))
fig3.show()
fig4 = px.line(df_plot, x = 'Date', y = ['Low', 'High'],
title = 'Low vs High',
template = 'presentation')
fig4.update_xaxes(rangeslider_visible = True, rangeselector = dict(buttons=list([
dict(count=1, label="1m", step="month", stepmode="backward"),
dict(count=6, label="6m", step="month", stepmode="backward"),
dict(count=1, label="YTD", step="year", stepmode="todate"),
dict(count=1, label="1y", step="year", stepmode="backward"),
dict(step="all")])))
fig4.show()
df1 = df_plot[cols].astype(str)
df2 = df1.astype(float)
df2
training_set = df2.as_matrix()
print('Shape of training set == {}.'.format(training_set.shape))
training_set
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0,1))
training_set_scaled = sc.fit_transform(training_set)
sc_predict = MinMaxScaler(feature_range=(0,1))
sc_predict.fit_transform(training_set[:,0:1])
X_train = []
y_train = []
n_future = 60 # Number of days we want top predict into the future
n_past = 90 # Number of past days we want to use to predict the future
for i in range(n_past, len(training_set_scaled) - n_future +1):
X_train.append(training_set_scaled[i - n_past:i, 0:df2.shape[1] - 1])
y_train.append(training_set_scaled[i + n_future - 1:i + n_future, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
print('X_train shape == {}.'.format(X_train.shape))
print('y_train shape == {}.'.format(y_train.shape))
import time
model = Sequential()
model.add(LSTM(units = 50, return_sequences=True, input_shape = (n_past, df2.shape[1]-1)))
model.add(Dropout(0.2))
model.add(LSTM(units = 50, return_sequences= True))
model.add(Dropout(0.2))
model.add(LSTM(units = 50, return_sequences= True))
model.add(Dropout(0.2))
model.add(LSTM(units = 50))
model.add(Dropout(0.2))
model.add(Dense(units = 1))
start = time.time()
model.compile(loss='mse', optimizer='rmsprop')
print('compilation time : ', time.time() - start)
es = EarlyStopping(monitor='loss', min_delta=1e-10, patience=20, verbose=1)
rlr = ReduceLROnPlateau(monitor='loss', factor=0.5, patience=20, verbose=1) # Factor gives a New LR = LR * factor
mcp = ModelCheckpoint(filepath='weights.h5', monitor='loss', verbose=1, save_best_only=True, save_weights_only=True)
ep = int(input('Enter the epochs you want: '))
bs = int(input('Enter the batch size you want: '))
history = model.fit(X_train, y_train, epochs= ep, batch_size= bs, callbacks = [es, rlr, mcp])
datelist_future = pd.date_range(datelist_train[-1], periods=n_future, freq='1d').tolist()
datelist_future_ = []
for this_timestamp in datelist_future:
datelist_future_.append(this_timestamp.date())
predictions_future = model.predict(X_train[-n_future:])
predictions_train = model.predict(X_train[n_past:])
def datetime_to_timestamp(x):
return datetime.strptime(x.strftime('%Y-%m-%d'), '%Y-%m-%d')
y_pred_future = sc_predict.inverse_transform(predictions_future)
y_pred_train = sc_predict.inverse_transform(predictions_train)
cyw = input('Enter the name of the variable you want to predict: ')
PREDICTIONS_FUTURE = pd.DataFrame(y_pred_future, columns=[cyw]).set_index(pd.Series(datelist_future))
PREDICTION_TRAIN = pd.DataFrame(y_pred_train, columns=[cyw]).set_index(pd.Series(datelist_train[2 * n_past + n_future -1:]))
PREDICTION_TRAIN.index = PREDICTION_TRAIN.index.to_series().apply(datetime_to_timestamp)
PREDICTION_TRAIN # These are all the Predictions for our Training data
PREDICTIONS_FUTURE
date_col = df.filter(['Date', cyw])
date_col
fin_mod = pd.concat([date_col, PREDICTION_TRAIN, PREDICTIONS_FUTURE], axis = 1)
fin_mod2 = fin_mod.reset_index(inplace = False)
fin_mod2.columns = ['Date', 'Actual Price', 'Training Predictions', 'Future Predictions']
fin_mod2
rmse= np.sqrt(np.mean(PREDICTION_TRAIN[cyw] - date_col[cyw])**2)
rmse
fig5 = px.line(fin_mod2, x = 'Date', y = ['Actual Price', 'Training Predictions', 'Future Predictions'],
title='Model',
template = 'presentation')
fig5.update_xaxes(rangeslider_visible = True, rangeselector = dict(buttons=list([
dict(count=1, label="1m", step="month", stepmode="backward"),
dict(count=6, label="6m", step="month", stepmode="backward"),
dict(count=1, label="YTD", step="year", stepmode="todate"),
dict(count=1, label="1y", step="year", stepmode="backward"),
dict(step="all")])))
fig5.show()